Let us set some global options for all code chunks in this document.
knitr::opts_chunk$set(
message = FALSE, # Disable messages printed by R code chunks
warning = FALSE, # Disable warnings printed by R code chunks
echo = TRUE, # Show R code within code chunks in output
include = TRUE, # Include both R code and its results in output
eval = TRUE, # Evaluate R code chunks
cache = FALSE, # Enable caching of R code chunks for faster rendering
fig.align = "center",
out.width = "100%",
retina = 2,
error = TRUE,
collapse = TRUE
)
rm(list = ls())
set.seed(1982)Let us now load some required libraries.
# Load required libraries
# inla.upgrade(testing = TRUE)
# remotes::install_github("inlabru-org/inlabru", ref = "devel")
# remotes::install_github("davidbolin/rspde", ref = "devel")
# remotes::install_github("davidbolin/metricgraph", ref = "devel")
# remotes::install_github("davidbolin/ngme2", ref = "devel")
library(INLA)
inla.setOption(num.threads = 6)
library(inlabru)
library(rSPDE)
library(MetricGraph)
library(ngme2)
library(plotly)
library(dplyr)
library(sf)
library(here)Function standarize() below is later used to standardize
the covariate SpeedLimit.
We load the graph object sf_graph (which only contains
weights) and the data (already graph-processed).
timeallprocedure <- Sys.time()
load(here("Graph_objects/graph_construction_19MAY24_FRC0134.RData"))
load(here("Data_files/data_day7142128_hour13_with_no_consecutive_zeros_19MAY24_FRC0134_graph_processed.RData"))
data_on_graph = data_on_graph %>%
dplyr::select(-datetime)The following commands remove zero speed observations that are 1m away from the graph, and after that, they remove any speed observations that are 3m away from the graph.
to_remove = data_on_graph %>%
filter(speed == 0, .distance_to_graph > 0.001)
data_on_graph = setdiff(data_on_graph, to_remove) %>%
filter(.distance_to_graph <= 0.003)We add data to the graph.
We get the values of the weights at data locations. This essentially gives us covariates from the weights.
When running
sf_graph$edgeweight_to_data(data_loc = TRUE), some
NA values are created (because the data is grouped). We
remove them below. We also standardize the SpeedLimit
covariate.
We add the data again but now with the new standardized
SpeedLimit covariate.
We get the value of the weights at mesh locations. This will allow us
to built matrices B.sigma and B.range below.
Again,
sf_graph$edgeweight_to_data(mesh = TRUE, add = FALSE, return = TRUE)
creates repeated information (because the data is grouped). We fix that
by filtering one group. We also standardize the SpeedLimit
covariate.
nonstat.time.ini <- Sys.time()
sigma <- 1.3
range <- 0.15
nu <- 0.5
sigma.e <- 0.1
rspde.order <- 1
B.sigma = cbind(0, 1, 0, mesh$SpeedLimit, 0)
B.range = cbind(0, 0, 1, 0, mesh$SpeedLimit)
cov_nonstat <- rSPDE::spde.matern.operators(graph = sf_graph,
parameterization = "matern",
B.sigma = B.sigma,
B.range = B.range,
theta = c(sigma, range, sigma, range),
nu = nu)$covariance_mesh()
L <- chol(cov_nonstat)
u_nonstat <- t(L)%*%rnorm(dim(cov_nonstat)[1])stat.time.ini <- Sys.time()
rspde_model_stat <- rspde.metric_graph(sf_graph,
parameterization = "matern",
nu = nu)
data_rspde_bru_stat <- graph_data_rspde(rspde_model_stat,
repl = ".all",
loc_name = "loc")
cmp_stat = speed ~ -1 +
Intercept(1) +
SpeedLimit +
field(loc, model = rspde_model_stat,
replicate = data_rspde_bru_stat[["repl"]])
rspde_fit_stat <-
bru(cmp_stat,
data = data_rspde_bru_stat[["data"]],
family = "gaussian",
options = list(verbose = FALSE)
)## Time difference of 2.27779 mins
## inlabru version: 2.10.1.9007
## INLA version: 24.05.18-2
## Components:
## Intercept: main = linear(1), group = exchangeable(1L), replicate = iid(1L)
## SpeedLimit: main = linear(SpeedLimit), group = exchangeable(1L), replicate = iid(1L)
## field: main = cgeneric(loc), group = exchangeable(1L), replicate = iid(data_rspde_bru_stat[["repl"]])
## Likelihoods:
## Family: 'gaussian'
## Data class: 'metric_graph_data', 'list'
## Predictor: speed ~ .
## Time used:
## Pre = 0.627, Running = 19.1, Post = 4.32, Total = 24
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## Intercept 0.035 0.153 -0.266 0.035 0.335 0.035 0
## SpeedLimit 0.955 0.009 0.938 0.955 0.972 0.955 0
##
## Random effects:
## Name Model
## field CGeneric
##
## Model hyperparameters:
## mean sd 0.025quant 0.5quant
## Precision for the Gaussian observations 100.14 1.396 97.40 100.14
## Theta1 for field 5.39 0.008 5.38 5.39
## Theta2 for field -8.07 0.018 -8.11 -8.06
## 0.975quant mode
## Precision for the Gaussian observations 102.90 100.16
## Theta1 for field 5.41 5.39
## Theta2 for field -8.04 -8.05
##
## Deviance Information Criterion (DIC) ...............: -30255.43
## Deviance Information Criterion (DIC, saturated) ....: 41303.55
## Effective number of parameters .....................: 15457.13
##
## Watanabe-Akaike information criterion (WAIC) ...: -33455.90
## Effective number of parameters .................: 9218.52
##
## Marginal log-Likelihood: -54298.15
## is computed
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
## mean sd 0.025quant 0.5quant 0.975quant mode
## std.dev 2.20161e+02 1.70109e+00 2.17522e+02 2.19920e+02 2.24025e+02 2.19028e+02
## range 3.13880e-04 5.71950e-06 2.90242e-04 3.24908e-04 3.24908e-04 3.19394e-04
init.vec.theta = c(fit.rspde$summary.log.std.dev$mode,
fit.rspde$summary.log.range$mode,
rep(0, (ncol(B.sigma)-3)))
rspde_model_nonstat <- rspde.metric_graph(sf_graph,
B.sigma = B.sigma,
B.range = B.range,
parameterization = "matern",
nu = nu)
data_rspde_bru_nonstat <- graph_data_rspde(rspde_model_nonstat,
repl = ".all",
loc_name = "loc")
cmp_nonstat = speed ~ -1 +
Intercept(1) +
SpeedLimit +
field(loc, model = rspde_model_nonstat,
replicate = data_rspde_bru_nonstat[["repl"]])
rspde_fit_nonstat <-
bru(cmp_nonstat,
data = data_rspde_bru_nonstat[["data"]],
family = "gaussian",
options = list(verbose = FALSE)
)## Time difference of 18.38253 mins
## inlabru version: 2.10.1.9007
## INLA version: 24.05.18-2
## Components:
## Intercept: main = linear(1), group = exchangeable(1L), replicate = iid(1L)
## SpeedLimit: main = linear(SpeedLimit), group = exchangeable(1L), replicate = iid(1L)
## field: main = cgeneric(loc), group = exchangeable(1L), replicate = iid(data_rspde_bru_nonstat[["repl"]])
## Likelihoods:
## Family: 'gaussian'
## Data class: 'metric_graph_data', 'list'
## Predictor: speed ~ .
## Time used:
## Pre = 0.387, Running = 23.2, Post = 2.98, Total = 26.6
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## Intercept -0.001 0.007 -0.015 -0.001 0.014 -0.001 0
## SpeedLimit 0.953 0.004 0.945 0.953 0.961 0.953 0
##
## Random effects:
## Name Model
## field CGeneric
##
## Model hyperparameters:
## mean sd 0.025quant 0.5quant
## Precision for the Gaussian observations 100.265 1.454 97.102 100.367
## Theta1 for field 1.475 0.023 1.429 1.476
## Theta2 for field 0.524 0.047 0.427 0.526
## Theta3 for field 1.057 0.025 1.015 1.055
## Theta4 for field -0.347 0.051 -0.432 -0.351
## 0.975quant mode
## Precision for the Gaussian observations 102.749 100.915
## Theta1 for field 1.519 1.479
## Theta2 for field 0.614 0.532
## Theta3 for field 1.112 1.044
## Theta4 for field -0.234 -0.372
##
## Deviance Information Criterion (DIC) ...............: -32216.17
## Deviance Information Criterion (DIC, saturated) ....: 39358.89
## Effective number of parameters .....................: 13463.75
##
## Watanabe-Akaike information criterion (WAIC) ...: -34313.36
## Effective number of parameters .................: 8649.63
##
## Marginal log-Likelihood: -13190.00
## is computed
## Posterior summaries for the linear predictor and the fitted values are computed
## (Posterior marginals needs also 'control.compute=list(return.marginals.predictor=TRUE)')
## mean sd 0.025quant 0.5quant 0.975quant mode
## Theta1.matern 1.475480 0.0229099 1.428510 1.476120 1.518630 1.478960
## Theta2.matern 0.524409 0.0474374 0.427138 0.525725 0.613737 0.531623
## Theta3.matern 1.056740 0.0251363 1.014760 1.054640 1.112240 1.044220
## Theta4.matern -0.346695 0.0510541 -0.431910 -0.350967 -0.233932 -0.372196
#load(here("Models_output/distmatrixfixed.RData"))
points = data_final %>%
as.data.frame() %>%
mutate(., index = 1:nrow(.)) %>%
dplyr:::select(speed, .group, index) %>%
mutate(.group = as.numeric(.group)) %>%
group_by(.group) %>%
mutate(indexingroup = seq_len(n())) %>%
ungroup()
distance = seq(from = 0, to = 200, by = 20)/1000The code of chunk below was executed only one time.
{r}
load(here("Models_output/distmatrixfixed_19May24.RData"))
points = data_final %>%
as.data.frame() %>%
mutate(., index = 1:nrow(.)) %>%
dplyr:::select(speed, .group, index) %>%
mutate(.group = as.numeric(.group)) %>%
group_by(.group) %>%
mutate(indexingroup = seq_len(n())) %>%
ungroup()
distance = seq(from = 0, to = 200, by = 20)/1000
GROUPS <- list()
for (j in 1:length(distance)) {
print(j)
GROUPS[[j]] = list()
for (i in 1:nrow(points)) {
rowi = points[i, ]
which.in.group <- which(as.vector(distmatrixlist[[rowi$.group]][rowi$indexingroup,]) <= distance[j])
GROUPS[[j]][[i]] <- filter(points, .group == rowi$.group)[which.in.group, ]$index
}
}
save(GROUPS, file = here("Models_output/GROUPS_19May24_corrected.RData"))
The code of chunk above was executed only one time.
ggplot(data_final, aes(x = .coord_x, y = .coord_y, color = .group)) +
geom_point(size = 0.1) +
facet_wrap(~ .group) +
theme_minimal() +
labs(title = "Scatter Plot of Points by Groups",
x = "X-axis Label",
y = "Y-axis Label")indexofinterest <- 21345
pointofinterest <- GROUPS[[11]][[indexofinterest]]
windowofinterest <- data_final[pointofinterest, ] |> as.data.frame() |> st_as_sf(coords = c(".coord_x", ".coord_y"), crs = 4326)
bbox <- st_bbox(windowofinterest)
polygon <- st_polygon(list(rbind(
c(bbox["xmin"], bbox["ymin"]),
c(bbox["xmax"], bbox["ymin"]),
c(bbox["xmax"], bbox["ymax"]),
c(bbox["xmin"], bbox["ymax"]),
c(bbox["xmin"], bbox["ymin"])
))) |>
st_sfc(crs = st_crs(windowofinterest))
data_final_filtered <- data_final %>%
as.data.frame() %>%
st_as_sf(coords = c(".coord_x", ".coord_y"), crs = 4326) %>%
st_filter(x = ., y = polygon, .predicate = st_within)
ggplot() +
geom_sf(data = data_final_filtered, aes(color = .group))
ggplot() +
geom_sf(data = data_final_filtered, aes(color = .group)) +
geom_point(data = data_final[pointofinterest, ], aes(x = .coord_x, y = .coord_y), color = "blue") +
geom_point(data = data_final[indexofinterest, ], aes(x = .coord_x, y = .coord_y), color = "red")
ggplot() +
geom_point(data = data_final[pointofinterest, ], aes(x = .coord_x, y = .coord_y), color = "blue") +
geom_point(data = data_final[indexofinterest, ], aes(x = .coord_x, y = .coord_y), color = "red")
ggplot() +
geom_point(data = filter(data_final, .group == data_final[indexofinterest, ]$.group), aes(x = .coord_x, y = .coord_y), color = "black") +
geom_point(data = data_final[pointofinterest, ], aes(x = .coord_x, y = .coord_y), color = "blue") +
geom_point(data = data_final[indexofinterest, ], aes(x = .coord_x, y = .coord_y), color = "red")mse.stat <- mse.nonstat <- ls.stat <- ls.nonstat <- rep(0,length(distance))
# cross-validation for-loop
for (j in 1:length(distance)) {
print(j)
# cross-validation of the stationary model
cv.stat <- inla.group.cv(rspde_fit_stat, groups = GROUPS[[j]])
# cross-validation of the nonstationary model
cv.nonstat <- inla.group.cv(rspde_fit_nonstat, groups = GROUPS[[j]])
# obtain MSE and LS
mse.stat[j] <- mean((cv.stat$mean - data_sim_nonstat$speed)^2)
mse.nonstat[j] <- mean((cv.nonstat$mean - data_sim_nonstat$speed)^2)
ls.stat[j] <- mean(log(cv.stat$cv))
ls.nonstat[j] <- mean(log(cv.nonstat$cv))
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## plot results
par(mfrow = c(2,2), family = "Palatino")
# Plot MSE
plot(distance, mse.stat, main = "MSE", ylim = c(min(mse.nonstat, mse.stat), max(mse.nonstat, mse.stat)),
type = "l", ylab = "MSE", xlab = "distance in m", col = "black")
lines(distance, mse.nonstat, col = "blue")
legend("bottomright", legend = c("Stationary", "Non-stationary"), col = c("black", "blue"), lty = 1)
# Plot log-score
plot(distance, -ls.stat, main = "log-score", ylim = c(min(-ls.nonstat, -ls.stat), max(-ls.nonstat, -ls.stat)),
type = "l", ylab = "log-score", xlab = "distance in m", col = "black")
lines(distance, -ls.nonstat, col = "blue")
legend("bottomright", legend = c("Stationary", "Non-stationary"), col = c("black", "blue"), lty = 1)